%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%                              MO-SAStrE
%            Multiobjective Optimization for Sequence Alignments 
%                      based on STRuctural Evaluations
%
% - INPUT_ALIGNMENTS: Vector with paths of alignments to optimize.
%
% - FILES_TYPES: (Optional) This argument determines the type of files
%                 which will be included in the FILES argument. It can be:
%                 1)"STRUCTURES" if a list of structure files are included.
%                 2)"CONTACTS" if a contact file is included.
%
% - FILES: (Optional) Vector of files including one of these two options 
%                     according to the type specified in FILES_TYPES:
%                     1) List of PDB structure files from the sequences.
%                     2) Contact file with those pairs of aminoacids 
%                        (their positions in the sequences) that are in 
%                        contacts according to the sequence.
%
% Francisco M. Ortuo Guzmn
% Department of Computer Architecture and Computer Technology
% University of Granada
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function alignment = MOSAStrE(input_alignments, files_types, files)

% Parameters to configure
max_num_poblations = 500;
percent_crossover = 0.8;
percent_mutation = 0.2;
num_chromosomes = 100;

% Format input_alignments struct
inputs = struct('file', []);
for i=1:length(input_alignments)
        inputs(i).file = input_alignments(i);
end

% Read headers from the first alignment
try
    [headers msaseq] = multialignread(char(input_alignments(1)));
catch
    fprintf('The first alignment file cannot be read. Check that the file and path are correct.');
    return;    
end

contact_seqs =[];
contacts=[];

% If files are not included, structures are downloaded
if(nargin<3)
    pos=1;
    for i=1:length(headers) 
        fprintf(['Downloading ' char(headers(i)) ' PDB structure...']); 
        % Download pdb structure (the structure name must be in headers)
        [PDBstruct] = download_pdb(headers(i));        

        % Calculate contacts to evaluate alignment with STRIKE score
        if(~isempty(PDBstruct))
            PDBstructs{pos}=PDBstruct; pos=pos+1;
            fprintf('Calculating contacts...');
            % Calculate contacts to evaluate alignment with STRIKE score
            [non_used, seqNumber]=ismember(headers(i), headers);
            sequence = char(msaseq(seqNumber));
            sequence = sequence(sequence~='.');
            new_contacts = calculate_contacts(PDBstruct,headers(i),sequence);
            contacts = [contacts; new_contacts];
            contact_seqs =[contact_seqs; repmat(seqNumber,length(new_contacts),1)];
            fprintf('DONE\n');
        end
    end

% If files are included, check if they are STRUCTURES or CONTACTS    
else
    if(strcmp(files_types,'STRUCTURES'))
        for i=1:length(files)
            % Read pdb structure if the files are given
            fprintf(['Reading ' char(files(i)) ' PDB structure...']); 
            PDBstruct = pdbread(char(files(i)));    
            header = regexp(lower(headers),lower(['^' PDBstruct.Header.idCode]));
            header = cellfun(@isempty,header);
            num_header = find(~header);
            seqName = headers(num_header);
            fprintf('DONE\n');
        
            % Calculate contacts to evaluate alignment with STRIKE score
            fprintf('Calculating contacts...');
            [non_used, seqNumber]=ismember(seqName, headers);
            sequence = char(msaseq(seqNumber));
            sequence = sequence(sequence~='.');
            new_contacts = calculate_contacts(PDBstruct,seqName,sequence);
            contacts = [contacts; new_contacts];
            contact_seqs =[contact_seqs; repmat(seqNumber,length(new_contacts),1)];          
            fprintf('DONE\n');
        end
        
    elseif(strcmp(files_types,'CONTACTS'))
        % Read contacts from file
        fprintf('Reading contacts...');            
        contacts = [];   
        [contact_seqs contacts(:,1) contacts(:,2)] = textread(files{1}, '%s %d %d\n');
        [non_used, contact_seqs]=ismember(contact_seqs, headers);
        fprintf('DONE\n');    
        
    else
        fprintf('The file type argument is not correct. Please, include "STRUCTURES" or "CONTACTS".');
        return; 
    end
    
end
   
% Definition of my fitness function
FitnessFcn = @(x) fitness_multi_strike(x,contact_seqs,contacts);

% Definition of first poblation function
my_create_population = @(NVARS, FitnessFcn,options) create_population(NVARS, FitnessFcn, options, inputs, headers);

% Options of the multiobjective genetic algorithm
options = gaoptimset('PopulationType', 'custom');
options = gaoptimset(options, 'CreationFcn',my_create_population, ...
'CrossoverFcn',@crossover, 'CrossoverFraction', percent_crossover, ...   
'MutationFcn',@mutation, ... 
'ParetoFraction',0.3, ...     
'Generations',max_num_poblations,'PopulationSize',num_chromosomes, ...
'StallGenLimit',200,'Vectorized','on','TolFun',0);

% Run the multiobjective algorithm
fprintf('Running MO-SAStrE...\n');
[x,fval,reason,output] = gamultiobj(FitnessFcn,3,[],[],[],[],[],[],options);
fprintf('DONE\n'); 

% Retrieve the best alignment (according to the STRIKE fitness)
[no_used index_strike] = min(fval(:,1));    
alignment = struct('Header',headers,'Sequence',cellstr(x{index_strike(1)}.msa));       